# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

########################################################################
# OPENAPI-URI: /api/code/retention
########################################################################
# get:
#   responses:
#     '200':
#       content:
#         application/json:
#           schema:
#             $ref: '#/components/schemas/Factor'
#       description: 200 Response
#     default:
#       content:
#         application/json:
#           schema:
#             $ref: '#/components/schemas/Error'
#       description: unexpected error
#   security:
#   - cookieAuth: []
#   summary: Shows retention metrics for a set of repos over a given period of time
# post:
#   requestBody:
#     content:
#       application/json:
#         schema:
#           $ref: '#/components/schemas/defaultWidgetArgs'
#   responses:
#     '200':
#       content:
#         application/json:
#           schema:
#             $ref: '#/components/schemas/Factor'
#       description: 200 Response
#     default:
#       content:
#         application/json:
#           schema:
#             $ref: '#/components/schemas/Error'
#       description: unexpected error
#   security:
#   - cookieAuth: []
#   summary: Shows retention metrics for a set of repos over a given period of time
#
########################################################################


"""
This is the code contributor retention factor renderer for Kibble
"""

import json
import time
import re
import datetime


def run(API, environ, indata, session):

    # We need to be logged in for this!
    if not session.user:
        raise API.exception(403, "You must be logged in to use this API endpoint! %s")

    now = time.time()

    # First, fetch the view if we have such a thing enabled
    viewList = []
    if indata.get("view"):
        viewList = session.getView(indata.get("view"))
    if indata.get("subfilter"):
        viewList = session.subFilter(indata.get("subfilter"), view=viewList)

    hl = indata.get(
        "span", 12
    )  # By default, we define a contributor as active if having committer in the past year
    tnow = datetime.date.today()
    nm = tnow.month - (tnow.month % 3)
    ny = tnow.year
    cy = ny
    ts = []

    if nm < 1:
        nm += 12
        ny = ny - 1

    peopleSeen = {}
    activePeople = {}
    allPeople = {}
    FoundSomething = False

    ny = 1970
    while ny < cy or (ny == cy and (nm + 3) <= tnow.month):
        d = datetime.date(ny, nm, 1)
        t = time.mktime(d.timetuple())
        nm += 3
        if nm > 12:
            nm -= 12
            ny = ny + 1
        if ny == cy and nm > tnow.month:
            break
        d = datetime.date(ny, nm, 1)
        tf = time.mktime(d.timetuple())

        ####################################################################
        ####################################################################
        dOrg = session.user["defaultOrganisation"] or "apache"
        query = {
            "query": {
                "bool": {
                    "must": [
                        {"range": {"tsday": {"from": t, "to": tf}}},
                        {"term": {"organisation": dOrg}},
                    ]
                }
            }
        }
        # Source-specific or view-specific??
        if indata.get("source"):
            query["query"]["bool"]["must"].append(
                {"term": {"sourceID": indata.get("source")}}
            )
        elif viewList:
            query["query"]["bool"]["must"].append({"terms": {"sourceID": viewList}})

        # Get an initial count of commits
        res = session.DB.ES.count(
            index=session.DB.dbname, doc_type="code_commit", body=query
        )

        globcount = res["count"]
        if globcount == 0 and not FoundSomething:
            continue
        FoundSomething = True

        # Get top 1000 committers this period
        query["aggs"] = {
            "by_committer": {"terms": {"field": "committer_email", "size": 25000}},
            "by_author": {"terms": {"field": "author_email", "size": 25000}},
        }
        res = session.DB.ES.search(
            index=session.DB.dbname, doc_type="code_commit", size=0, body=query
        )

        retained = 0
        added = 0
        lost = 0

        thisPeriod = []
        for bucket in res["aggregations"]["by_author"]["buckets"]:
            who = bucket["key"]
            thisPeriod.append(who)
            if who not in peopleSeen:
                peopleSeen[who] = tf
                added += 1
            activePeople[who] = tf
            if who not in allPeople:
                allPeople[who] = tf

        prune = []
        for k, v in activePeople.items():
            if v < (t - (hl * 30.45 * 86400)):
                prune.append(k)
                lost += 1

        for who in prune:
            del activePeople[who]
            del peopleSeen[who]
        retained = len(activePeople) - added

        ts.append(
            {
                "date": tf,
                "People who (re)joined": added,
                "People who quit": lost,
                "People retained": retained,
                "Active people": added + retained,
            }
        )

    groups = [
        ["More than 5 years", (5 * 365 * 86400) + 1],
        ["2 - 5 years", (2 * 365 * 86400) + 1],
        ["1 - 2 years", (365 * 86400)],
        ["Less than a year", 1],
    ]

    counts = {}
    totExp = 0
    for person, age in activePeople.items():
        totExp += time.time() - allPeople[person]
        for el in sorted(groups, key=lambda x: x[1], reverse=True):
            if allPeople[person] <= time.time() - el[1]:
                counts[el[0]] = counts.get(el[0], 0) + 1
                break
    avgyr = (totExp / (86400 * 365)) / max(len(activePeople), 1)

    ts = sorted(ts, key=lambda x: x["date"])
    avgm = ""
    yr = int(avgyr)
    ym = round((avgyr - yr) * 12)
    if yr >= 1:
        avgm += "%u year%s" % (yr, "s" if yr != 1 else "")
    if ym > 0:
        avgm += "%s%u month%s" % (", " if yr > 0 else "", ym, "s" if ym != 1 else "")
    JSON_OUT = {
        "text": "This shows Contributor retention as calculated over a %u month timespan. The average experience of currently active people is %s."
        % (hl, avgm),
        "timeseries": ts,
        "counts": counts,
        "averageYears": avgyr,
        "okay": True,
        "responseTime": time.time() - now,
    }
    yield json.dumps(JSON_OUT)
